home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
SGI Freeware 2002 November
/
SGI Freeware 2002 November - Disc 2.iso
/
dist
/
fw_glimpse.idb
/
usr
/
freeware
/
src
/
glimpse-3.0
/
index
/
utils.c.z
/
utils.c
Wrap
C/C++ Source or Header
|
1997-09-09
|
8KB
|
377 lines
#include "glimpse.h"
/* n is guaranteed to be < MaxNum4bPartition */
int
encode4b(n)
int n;
{
if (n=='\0') return MaxNum4bPartition;
if (n=='\n') return MaxNum4bPartition+1;
return n;
}
int
decode4b(n)
int n;
{
if (n==MaxNum4bPartition) return '\0';
if (n==MaxNum4bPartition+1) return '\n';
return n;
}
/* n is guaranteed to be < MaxNum8bPartition */
int
encode8b(n)
int n;
{
if (n=='\0') return MaxNum8bPartition;
if (n=='\n') return MaxNum8bPartition+1;
return n;
}
int
decode8b(n)
int n;
{
if (n==MaxNum8bPartition) return '\0';
if (n==MaxNum8bPartition+1) return '\n';
return n;
}
/* n is guaranteed to be < MaxNum12bPartition */
int
encode12b(n)
int n;
{
unsigned char msb, lsb;
msb = (n / MaxNum8bPartition);
lsb = (n % MaxNum8bPartition);
msb = encode4b(msb);
lsb = encode8b(lsb);
return (msb<<8)|lsb;
}
int
decode12b(n)
int n;
{
unsigned char msb, lsb;
msb = ((n&0x00000f00) >> 8);
lsb = (n&0x000000ff);
msb = decode4b(msb);
lsb = decode8b(lsb);
return (msb * MaxNum8bPartition) + lsb;
}
/* n is guaranteed to be < MaxNum16bPartition */
int
encode16b(n)
int n;
{
unsigned char msb, lsb;
msb = (n / MaxNum8bPartition);
lsb = (n % MaxNum8bPartition);
msb = encode8b(msb);
lsb = encode8b(lsb);
return (msb<<8)|lsb;
}
int
decode16b(n)
int n;
{
unsigned char msb, lsb;
msb = ((n&0x0000ff00) >> 8);
lsb = (n&0x000000ff);
msb = decode8b(msb);
lsb = decode8b(lsb);
return (msb * MaxNum8bPartition) + lsb;
}
/* n is guaranteed to be < MaxNum24bPartition */
int
encode24b(n)
int n;
{
unsigned short msb, lsb;
msb = (n / MaxNum16bPartition);
lsb = (n % MaxNum16bPartition);
msb = encode8b(msb);
lsb = encode16b(lsb);
return (msb<<16)|lsb;
}
int
decode24b(n)
int n;
{
unsigned short msb, lsb;
msb = ((n&0x00ff0000) >> 16);
lsb = (n&0x0000ffff);
msb = decode8b(msb);
lsb = decode16b(lsb);
return (msb * MaxNum16bPartition) + lsb;
}
/* n is guaranteed to be < MaxNum32bPartition */
int
encode32b(n)
int n;
{
unsigned short msb, lsb;
msb = (n / MaxNum16bPartition);
lsb = (n % MaxNum16bPartition);
msb = encode16b(msb);
lsb = encode16b(lsb);
return (msb<<16)|lsb;
}
int
decode32b(n)
int n;
{
unsigned short msb, lsb;
msb = ((n&0xffff0000) >> 16);
lsb = (n&0x0000ffff);
msb = decode16b(msb);
lsb = decode16b(lsb);
return (msb * MaxNum16bPartition) + lsb;
}
/*
* converts file-names with *,. and ? and converts it to # \. and ? ALL OTHER agrep-special characters are masked off.
* if the filename NOT a regular expression involving ? or *, it leaves the name untouched and returns the string
* length of the file name (so that we can avoid memagrep calls): otherwise, it returns the -ve strlength of the name
* after performing the above conversion: hence we never need to call agrep if the length is +ve.
*/
int
convert2agrepregexp(buf, len)
char *buf;
int len;
{
char tbuf[MAX_PAT];
int i=0, j=0;
/* Ignore '*' at the beginning and '*' at the end */
if (len < 1) return 0;
if ( ((len == 1) && (buf[len-1] == '*')) || ((len >= 2) && (buf[len-1] == '*') && (buf[len-1] != '\\')) ) {
buf[len-1] = '\0';
len--;
}
if (buf[0] == '*') {
for (i=0; i<len; i++)
buf[i] = buf[i+1];
len--;
}
if (len < 1) {
buf[0] = '.';
buf[1] = '*';
buf[2] = '\0';
return -2;
}
for (i=0; i<len; i++)
if (buf[i] == '\\') i++;
else if ((buf[i] == '?') || (buf[i] == '*')
|| (buf[i] == '$') || (buf[i] == '^')) break;
if (i >= len) return len;
i = j = 0;
while ((i<len) && (j<MAX_PAT) && (buf[i] != '\0')) {
/* Consider all special characters interpreted by agrep */
if (buf[i] == '\\') {
/* copy two things without interpreting them */
tbuf[j++] = buf[i++];
tbuf[j++] = buf[i++];
}
else if ((buf[i] == '-') || (buf[i] == ',') || (buf[i] == ';')||
(buf[i] == '.') || (buf[i] == '#') || (buf[i] == '|')||
(buf[i] == '[') || (buf[i] == ']') || (buf[i] == '(')||
(buf[i] == ')') || (buf[i] == '>') || (buf[i] == '<')||
/* (buf[i] == '^') || (buf[i] == '$') || */
(buf[i] == '+')||
(buf[i] == '{') || (buf[i] == '}') || (buf[i] == '~')){
tbuf[j++] = '\\';
tbuf[j++] = buf[i];
i++;
}
/* Interpret ONLY ? and * in file-names */
else if (buf[i] == '?') {
tbuf[j++] = '.';
i++;
}
else if (buf[i] == '*') {
tbuf[j++] = '.';
tbuf[j++] = '*';
i++;
}
else tbuf[j++] = buf[i++];
}
if (j >= MAX_PAT) {
tbuf[j-1] = '\0';
fprintf(stderr, "glimpseindex: pattern '%s' too long\n", buf);
j--;
}
else {
tbuf[j] = '\0';
}
strcpy(buf, tbuf);
#if 0
printf("%s=%d\n", buf, j);
#endif /*0*/
return -j; /* strlen-compatible, -ve to indicate memagrep must be called */
}
/* -----------------------------------------------------------------
input: a word (a string of ascii character terminated by NULL)
output: a hash_value of the input word.
hash function: if the word has length <= 4
the hash value is just a concatenation of the last four bits
of the characters.
if the word has length > 4, then after the above operation,
the hash value is updated by adding each remaining character.
(and AND with the 16-bits mask).
bug-fixes in all hashing functions: Chris Dalton
---------------------------------------------------------------- */
int
hash64k(word, len)
char *word;
int len;
{
unsigned int hash_value=0;
unsigned int mask_4=017;
unsigned int mask_16=0177777;
int i;
if(len<=4) {
for(i=0; i<len; i++) {
hash_value = (hash_value << 4) | (word[i]&mask_4);
/* hash_value = hash_value & mask_16; */
}
}
else {
for(i=0; i<4; i++) {
hash_value = (hash_value << 4) | (word[i]&mask_4);
/* hash_value = hash_value & mask_16; */
}
for(i=4; i<len; i++)
hash_value = mask_16 & (hash_value + word[i]);
}
return(hash_value & mask_16);
}
/*
* Explicitly used with -B option
*/
int
hash256k(word, len)
char *word;
int len;
{
unsigned int hash_value=0;
unsigned int mask_4=017;
unsigned int mask_5=037;
unsigned int mask_18=0x3ffff;
int i;
if(len<=4) {
for(i=0; i<len; i++) {
if ((i % 2) == 0) hash_value = (hash_value << 5) | (word[i]&mask_5);
else hash_value = (hash_value << 4) | (word[i]&mask_4);
/* hash_value = hash_value & mask_18; */
}
}
else {
for(i=0; i<4; i++) {
if ((i % 2) == 0) hash_value = (hash_value << 5) | (word[i]&mask_5);
else hash_value = (hash_value << 4) | (word[i]&mask_4);
/* hash_value = hash_value & mask_18; */
}
for(i=4; i<len; i++)
hash_value = mask_18 & (hash_value + word[i]);
}
return(hash_value & mask_18);
}
/*
* Explicitly used for veryfastsearch without WORD_SORTED
* Using > 5 bits is waste since there are only 26 lower case letters
*/
int
hash32k(word, len)
char *word;
int len;
{
unsigned int hash_value=0;
unsigned int mask_5=037;
unsigned int mask_15=077777;
int i;
if(len<=3) {
for(i=0; i<len; i++) {
hash_value = (hash_value << 5) | (word[i]&mask_5);
}
}
else {
for(i=0; i<3; i++) {
hash_value = (hash_value << 5) | (word[i]&mask_5);
}
for(i=3; i<len; i++)
hash_value = mask_15 & (hash_value + word[i]);
}
return(hash_value & mask_15);
}
/* This function is utterly disgraceful */
int
hash16k(word, len)
char *word;
int len;
{
return hash32k(word, len) & 0x3fff;
}
/*
* Explicitly used for -f and -a options: has low collisions (<=2) for filenames
*/
int
hash4k(word, len)
char *word;
int len;
{
unsigned int hash_value=0;
unsigned int mask_3=07;
unsigned int mask_12=07777;
int i;
if(len<=4) {
for(i=0; i<len; i++) {
hash_value = (hash_value << 3) | (word[i]&mask_3);
}
}
else {
for(i=0; i<4; i++) {
hash_value = (hash_value << 3) | (word[i]&mask_3);
}
for(i=4; i<len; i++)
hash_value = mask_12 & (hash_value + word[i]);
}
return(hash_value & mask_12);
}